package com.demo.util;

import com.alibaba.fastjson.JSON;
import com.demo.bean.*;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

import java.io.*;
import java.util.ArrayList;
import java.util.List;

/**
 * 解析xml工具类
 */
public class PatentUtil {
    /**
     * 解析xml
     * @param path xml文件路径
     * @return
     */
    public static Patent parseXml(String path){
        Patent patent=new Patent();
        String resultPath = deleteDtd(path);
        try {

            // 实例化一个类用于添加xml文件
            SAXReader reader = new SAXReader();
            File xmlFile=new File(resultPath);
            //生成xml附属文件的文件夹
            String xmlFileFolder=xmlFile.getParent();
            Document doc = reader.read(xmlFile);
            // 读取指定标签
            //获取根标签
            Element patentDocument=doc.getRootElement();

            Element bibliographicDataElement =patentDocument.element("bibliographic-data");
            //获取公开号
            Element publicationReferenceElement =bibliographicDataElement .element("publication-reference");
            Element document_id =publicationReferenceElement .element("document-id");
            PublicationReference publicationReference=new PublicationReference();
            List<Element> publicationReferenceElements = document_id.elements();
            publicationReference.setCountry(publicationReferenceElements.get(0).getTextTrim());
            publicationReference.setDocNumber(publicationReferenceElements.get(1).getTextTrim());
            publicationReference.setKind(publicationReferenceElements.get(2).getTextTrim());
            publicationReference.setDate(publicationReferenceElements.get(3).getTextTrim());
            patent.setPublicNum(JSON.toJSONString(publicationReference));
//            File publicNumFile=new File(xmlFileFolder+File.separator+"00公开号.txt");
//            BufferedWriter bufferedWriter=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(publicNumFile),"UTF-8"));//顺便练习下匿名类的使用
//            bufferedWriter.write(publicationReference.getDocNumber());
//            bufferedWriter.close();
            //获取申请号
            Element applicationReferenceElement =bibliographicDataElement .element("application-reference");
            document_id =applicationReferenceElement .element("document-id");
            ApplicationReference applicationReference=new ApplicationReference();
            List<Element> applicationReferenceElements = document_id.elements();
            applicationReference.setCountry(applicationReferenceElements.get(0).getTextTrim());
            applicationReference.setDocNumber(applicationReferenceElements.get(1).getTextTrim());
            applicationReference.setKind(applicationReferenceElements.get(2).getTextTrim());
            applicationReference.setDate(applicationReferenceElements.get(3).getTextTrim());
            patent.setApplyNum(JSON.toJSONString(applicationReference));

//            File applyNumFile=new File(xmlFileFolder+File.separator+"01申请号.txt");
//            bufferedWriter=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(applyNumFile),"UTF-8"));//顺便练习下匿名类的使用
//            bufferedWriter.write(applicationReference.getDocNumber());
//            bufferedWriter.close();
            //获取公开日期
            Element datesOfPublicAvailabilityElement = bibliographicDataElement.element("dates-of-public-availability");
            if(datesOfPublicAvailabilityElement!=null){
                Element intentionToGrantDateElement=datesOfPublicAvailabilityElement.element("intention-to-grant-date");
                Element date=intentionToGrantDateElement.element("date");
                patent.setDatesOfPublicAvailability(date.getTextTrim());
            }
            //获取授权日期
//            Element priorityClaimsElement =bibliographicDataElement .element("priority-claims");
            Element termOfGrantElement=bibliographicDataElement.element("term-of-grant");
            if(termOfGrantElement!=null){
                List<Element> lapseOfPatentElementList = termOfGrantElement.elements("lapse-of-patent");
                List<LapseOfPatent> lapseOfPatents=new ArrayList<>();
                for(int i=0;i<lapseOfPatentElementList.size();i++){
                    LapseOfPatent lapseOfPatent=new LapseOfPatent();
                    Element lapseOfPatentElement=lapseOfPatentElementList.get(i);
                    document_id=lapseOfPatentElement.element("document-id");
                    lapseOfPatent.setCountry(document_id.element("country").getTextTrim());
                    lapseOfPatent.setDate(document_id.element("date").getTextTrim());
                    lapseOfPatents.add(lapseOfPatent);
                }

                patent.setTermOfGrant(JSON.toJSONString(lapseOfPatents));
            }

            //获取英文标题
            Element technicalDataElement = bibliographicDataElement.element("technical-data");
            List<Element>titleElementList=technicalDataElement.elements("invention-title");
            for(int i=0;i<titleElementList.size();i++){
                Element titleElement=titleElementList.get(i);
                if("EN".equals(titleElement.attribute("lang").getData().toString())){
                    patent.setTitle(titleElement.getTextTrim());
//                    File titleFile=new File(xmlFileFolder+File.separator+"02标题.txt");
//                    bufferedWriter=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(titleFile),"UTF-8"));//顺便练习下匿名类的使用
//                    bufferedWriter.write(titleElement.getTextTrim());
//                    bufferedWriter.close();
                }
            }
            //获取引用
            Element citationsElement = technicalDataElement.element("citations");
            if(citationsElement!=null){
                Element patentCitations=citationsElement.element("patent-citations");
                List<Element> patcitElementList = patentCitations.elements("patcit");
                List<Patcit> patcits=new ArrayList<>();
                for(int i=0;i<patcitElementList.size();i++){
                    Patcit patcit=new Patcit();
                    Element patcitElement=patcitElementList.get(i);
                    document_id = patcitElement.element("document-id");
                    String country=document_id.element("country").getTextTrim();
                    patcit.setCountry(country);
                    String docNumber=document_id.element("doc-number").getTextTrim();
                    patcit.setDocNumber(docNumber);
                    String kind=document_id.element("kind").getTextTrim();
                    patcit.setKind(kind);
                    patcits.add(patcit);
                }
                Citation citation=new Citation();
                citation.setPatcits(patcits);

                Element nonPatentCitations=citationsElement.element("non-patent-citations");
                Element nplcitElement=nonPatentCitations.element("nplcit");
                String text=nplcitElement.element("text").getTextTrim();
                List<String> nplcitList=new ArrayList<>();
                nplcitList.add(text);
                citation.setNplcits(nplcitList);
                patent.setCitation(JSON.toJSONString(citation));
            }

            //获取ipcr
            Element classificationsIpcr=technicalDataElement.element("classifications-ipcr");
            List<Element> ipcrElementList=classificationsIpcr.elements("classification-ipcr");
            List<String>ipcrs=new ArrayList<>();
            for(int i=0;i<ipcrElementList.size();i++){
                Element ipcr=ipcrElementList.get(i);
                ipcrs.add(ipcr.getTextTrim());
            }
            patent.setIpcrs(ipcrs);
            //获取ecla
            Element classificationsEcla=technicalDataElement.element("classification-ecla");
            List<Element> eclaElementList=classificationsEcla.elements("classification-symbol");
            List<String>eclas=new ArrayList<>();
            for(int i=0;i<eclaElementList.size();i++){
                Element ecla=eclaElementList.get(i);
                eclas.add(ecla.getTextTrim());
            }
            patent.setEcla(eclas);
            //获取参与人
            Element partiesElement = bibliographicDataElement.element("parties");
            Element applicantsElement = partiesElement.element("applicants");
            List<Element> applicantElementList=applicantsElement.elements("applicant");
            List<String>applicants=new ArrayList<>();
            for(int i=0;i<applicantElementList.size();i++){
                Applicant applicant=new Applicant();
                Element applicantElement=applicantElementList.get(i);
                Element addressBookElement=applicantElement.element("addressbook");
                Element nameElement=addressBookElement.element("name");
                if(nameElement!=null){
                    applicant.setName(nameElement.getTextTrim());
                }
                Element lastNameElement=addressBookElement.element("last-name");
                if(lastNameElement!=null){
                    applicant.setLastName(lastNameElement.getTextTrim());
                }
                Element addressElement=addressBookElement.element("address");
                if(addressElement!=null){
                    Element cityElement=addressElement.element("city");
                    if(cityElement!=null){
                        applicant.setCity(cityElement.getTextTrim());
                    }
                    Element countryElement=addressElement.element("country");
                    if(countryElement!=null){
                        applicant.setCountry(countryElement.getTextTrim());
                    }
                }

                applicants.add(JSON.toJSONString(applicant));
            }
            patent.setApplicants(applicants);
            //获取发明人
            Element inventorsElement = partiesElement.element("inventors");
            List<Element>inventorElementList=inventorsElement.elements("inventor");
            List<String>inventors=new ArrayList<>();
            for (int i=0;i<inventorElementList.size();i++){
                Inventor inventor=new Inventor();
                Element inventorElement=inventorElementList.get(i);
                Element addressBookElement=inventorElement.element("addressbook");
                Element nameElement=addressBookElement.element("name");
                if(nameElement!=null){
                    inventor.setName(nameElement.getTextTrim());
                }
                Element lastNameElement=addressBookElement.element("last-name");
                if(lastNameElement!=null){
                    inventor.setLastName(lastNameElement.getTextTrim());
                }
                Element addressElement=addressBookElement.element("address");
                if(addressElement!=null){
                    Element cityElement=addressElement.element("city");
                    if(cityElement!=null){
                        inventor.setCity(cityElement.getTextTrim());
                    }
                    Element countryElement=addressElement.element("country");
                    if(countryElement!=null){
                        inventor.setCountry(countryElement.getTextTrim());
                    }
                }
                inventors.add(JSON.toJSONString(inventor));
            }
            patent.setInventors(inventors);
            //获取英文摘要
            List<Element> abstractElementList = patentDocument.elements("abstract");
            for(int i=0;i<abstractElementList.size();i++){
                Element abstractElement=abstractElementList.get(i);
                if("EN".equals(abstractElement.attribute("lang").getData().toString())){
                    String enAbstract=abstractElement.element("p").getTextTrim();
                    patent.setEnAbstract(enAbstract);
                }
            }
            //获取英文描述
            List<Element> descriptionElementList = patentDocument.elements("description");
            List<String> descriptionList=new ArrayList<>();
            StringBuilder sb=new StringBuilder();
            for(int i=0;i<descriptionElementList.size();i++){
                Element descriptionElement=descriptionElementList.get(i);
//                找到英文的描述
                if("EN".equals(descriptionElement.attribute("lang").getData().toString())){
                    List<Element> elementList = descriptionElement.elements();
                    for(int j=0;j<elementList.size();j++){
                        Element element=elementList.get(j);
                        descriptionList.add(element.getTextTrim());
                        sb.append(element.getTextTrim()).append("\n");
                    }
                }else {
                    continue;
                }

            }

            patent.setDescription(descriptionList);
//            File abstractFile=new File(xmlFileFolder+File.separator+"05方案描述.txt");
//            bufferedWriter=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(abstractFile),"UTF-8"));//顺便练习下匿名类的使用
//            bufferedWriter.write(sb.toString());
//            bufferedWriter.close();
            //获取权利要求
            Element claimsElement=patentDocument.element("claims");
            List<String>claims=new ArrayList<>();
            if("EN".equals(claimsElement.attribute("lang").getData())){
                List<Element> claimElementlist = claimsElement.selectNodes("//claim-text");
                for(int i=0;i<claimElementlist.size();i++){
                  Element claimElement=claimElementlist.get(i);
                  claims.add(claimElement.getTextTrim());
                }
            }
            patent.setClaim(claims);
            //获取版权
            Element copyrightElement = patentDocument.element("copyright");
            patent.setCopyright(copyrightElement.getTextTrim());

//            System.out.println(patent);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return patent;
    }

    /**
     * 去掉开头的dtd（有dtd开头dom4j会报错）
     * @param path
     * @return
     */
    public static String deleteDtd(String path){

        File file = new File(path);//待读取文件,表示在项目根目录下
        String fileName=file.getName();
        String fileNameWithoutSuffix=fileName.substring(0, fileName.lastIndexOf("."));
        System.out.println(fileName);
        String parent = file.getParent();
        System.out.println(parent);
        String content;
        if(file.exists()){
            try{
                String str;
                StringBuffer stringBuffer=new StringBuffer();
//                读取文件的内容
                FileInputStream fileInputStream=new FileInputStream(file);
                InputStreamReader inputStreamReader=new InputStreamReader(fileInputStream,"UTF-8");
                BufferedReader bufferedReader=new BufferedReader(inputStreamReader);
                while ((content=bufferedReader.readLine())!=null){
//                    System.out.println(content);
                    stringBuffer.append(content);
                }
                bufferedReader.close();
                inputStreamReader.close();
                fileInputStream.close();
//                创建文件夹
                File folder=new File(parent+File.separator+fileNameWithoutSuffix);
                folder.mkdirs();
//                向文件写入内容
                File newfile=new File(parent+File.separator+fileNameWithoutSuffix+File.separator+fileName);//待写入文件
                BufferedWriter bufferedWriter=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(newfile),"UTF-8"));//顺便练习下匿名类的使用
//                去掉dtd
                String result=stringBuffer.toString();
                String[] split = result.split(".dtd\">");
                result= split[1];
                bufferedWriter.write(result);
                bufferedWriter.close();
                System.out.println("文件写入内容完成");
            }catch (FileNotFoundException e){
                e.printStackTrace();
            }catch (UnsupportedEncodingException e){
                e.printStackTrace();
            }catch (IOException e){
                e.printStackTrace();
            }
        }
        return parent+File.separator+fileNameWithoutSuffix+File.separator+fileName;
    }

    public static void main(String[] args) {
//        String path=deleteDtd("D:\\project\\springboot-neo4j-master\\src\\main\\resources\\patent\\PAC-1003_EP-1324078-A2.xml");
        parseXml("D:\\project\\springboot-neo4j-master\\src\\main\\resources\\patent\\PAC-1001_EP-1233512-A2.xml");
        System.out.println("完成");
    }
}
